##You can create time plot using autoplot() function
arrival_daily %>% feasts::autoplot() # create a time plot of daily data
## Plot variable not specified, automatically selected `.vars = arrival`
## `mutate_if()` ignored the following grouping variables:
arrival_hourly %>% autoplot() # create a time plot of hourly data
## Plot variable not specified, automatically selected `.vars = arrival`
total_arrival_hourly %>% feasts::autoplot()
## Plot variable not specified, automatically selected `.vars = arrival`
total_arrival_daily %>% autoplot()
## Plot variable not specified, automatically selected `.vars = arrival`
You can use filter_index or head() and tail() to select a subset of time series , try ?filter_index or google it
total_arrival_daily %>% tsibble::filter_index("2016-02") %>% autoplot(arrival) # plot only February 2016
total_arrival_daily %>% tsibble::filter_index("2016-02" ~ .) %>% autoplot(arrival) # plot from February 2016 till the end
total_arrival_daily %>% tsibble::filter_index(. ~ "2016-02") %>% autoplot(arrival) # plot from February 2016 till the end
total_arrival_hourly %>% tsibble::filter_index("2015-01-01") %>% autoplot(arrival) # plot only 2015-01-01
## Warning: Argument 'roll' is deprecated. Deprecated in version '1.8.4'.
## Warning: Argument 'roll' is deprecated. Deprecated in version '1.8.4'.
total_arrival_daily %>% head(n=100) %>% autoplot() # first 100 observations
## Plot variable not specified, automatically selected `.vars = arrival`
total_arrival_daily %>% tail(n=100) %>% autoplot() # last 100 observations
## Plot variable not specified, automatically selected `.vars = arrival`
It could be really hard to visualise hourly or daily data, one possibility is to view a portion of the data, you can also use interactive plots:
tsbox::ts_xts(total_arrival_daily) %>%
dygraph() %>%
dyRangeSelector(dateWindow = c("2010-01-01", "2010-02-01"))
## [time]: 'date' [value]: 'arrival'
tsbox::ts_xts(total_arrival_hourly) %>%
dygraph() %>%
dyRangeSelector(dateWindow = c("2010-01-01", "2010-01-02"))
## [value]: 'arrival'
You can also plot monthly, weekly, quarterly time series using ggplot2 package, see below for an example
monthly_admissions <- total_arrival_daily %>%
index_by(month = yearmonth(date)) %>%
summarise(arrival = sum(arrival))
monthly_admissions %>%
autoplot(arrival) +
labs(y = "arrival", x="Month",
title = "Monthly A&E arrival",
subtitle = "UK hospital")
use seasonal and subseries plots to check whether series contain seasonality
total_arrival_daily %>% feasts::gg_season(arrival)
total_arrival_daily %>% feasts::gg_subseries(arrival)
You can also try this with hourly series, you can change the “period= …” argument to see seasonality
total_arrival_hourly %>% feasts::gg_season(arrival, period = "day")
total_arrival_daily %>% feasts::gg_season(arrival, period = "week")
total_arrival_hourly %>% gg_season(arrival,period = "week")# change period
Is there any seasonality in the daily time series? what about hourly and monthly?
How do you create a seasonal plot for the weekly, monthly and quarterly series series
weekly_admissions <- total_arrival_daily %>%
index_by(week = yearweek(date)) %>%
summarise(arrival = sum(arrival))
gg_season(weekly_admissions)
## Plot variable not specified, automatically selected `y = arrival`
gg_subseries(weekly_admissions)
## Plot variable not specified, automatically selected `y = arrival`
gg_season(monthly_admissions)
## Plot variable not specified, automatically selected `y = arrival`
gg_subseries(monthly_admissions)
## Plot variable not specified, automatically selected `y = arrival`
quarterly_admissions <- total_arrival_daily %>%
index_by(quarter = yearquarter(date)) %>%
summarise(arrival = sum(arrival))
gg_season(quarterly_admissions)
## Plot variable not specified, automatically selected `y = arrival`
gg_subseries(quarterly_admissions)
## Plot variable not specified, automatically selected `y = arrival`
total_arrival_daily %>% feasts::gg_lag(arrival, lags = c(1:14), geom = "point")# create lag plots for 14 lags, from 1 to 14
total_arrival_daily %>% feasts::ACF(lag_max = 14)# compute autocorrelation function for 14 lags
## Response variable not specified, automatically selected `var = arrival`
## # A tsibble: 14 x 2 [1D]
## lag acf
## <cf_lag> <dbl>
## 1 1D 0.428
## 2 2D 0.177
## 3 3D 0.173
## 4 4D 0.162
## 5 5D 0.166
## 6 6D 0.323
## 7 7D 0.465
## 8 8D 0.292
## 9 9D 0.0905
## 10 10D 0.0919
## 11 11D 0.0820
## 12 12D 0.0734
## 13 13D 0.246
## 14 14D 0.390
plot the autocorrelation
total_arrival_hourly %>% ACF(lag_max = 48) %>% autoplot()# plot acf
## Response variable not specified, automatically selected `var = arrival`
total_arrival_daily %>% ACF(lag_max = 14) %>% autoplot()# plot acf
## Response variable not specified, automatically selected `var = arrival`
You can have a time plot, acf and season plot in one single plot if
you use gg_tsdisplay() function
# total_arrival_hourly %>% filter(gender == "female") %>% gg_tsdisplay()# check ?gg_tsdisplay
# total_arrival_daily %>% filter(gender == "female") %>% gg_tsdisplay()# check ?gg_tsdisplay
total_arrival_hourly %>% filter(gender == "female") %>% gg_tsdisplay()# check ?gg_tsdisplay
## Plot variable not specified, automatically selected `y = arrival`
total_arrival_daily %>% gg_tsdisplay()# check ?gg_tsdisplay
## Plot variable not specified, automatically selected `y = arrival`
You can use ljung box to test whether ACF is significant, if p-value is small, << 0.05 then there is a significant autocorrelation:
total_arrival_daily %>% features(arrival, ljung_box)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 419. 0
What autocorrelation will tell us? Which key features could be highlighted by ACF?
You can create any graph that helps you to better understand data! I recommend you to look at the distributions of your variables, try geom_boxplot() , geom_histogram() and geom_density() which are helpful to better understand the variations
Here I tried to see if arrival of males or females is different over the weekend comparing to the weekday
weekend_an_weekday <- total_arrival_hourly %>% group_by(gender) %>%
summarise(arrival=sum(arrival)) %>%
mutate(
Date=lubridate::as_date(time),
hour=lubridate::hour(time),
Day = lubridate::wday(time, label = TRUE),
Weekend = (Day %in% c("Sun", "Sat"))) %>%
filter(gender =="female")
weekend_an_weekday %>% ggplot(aes(x = hour, y = arrival)) +
geom_line(aes(group=Date)) +
facet_grid(Weekend ~., scales="free_y")